From 8978cc08d9353ace405f1a2d65c3fef963432fa6 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Thu, 26 Nov 2009 11:00:49 +0000 Subject: [PATCH] Implement rdtscp emulation and rdtscp_aux "support" The rdtscp instruction (and the associated TSC_AUX msr) are present on most recent AMD processors, and on the Nehalem and future Intel processors. Cpuid has a bit to detect the presence of this feature. Xen intentionally does not expose the cpuid rdtscp bit to PV OS's or to HVM guests, but PV apps can see this bit and, as a result, may choose to use the rdtscp instruction. When a PV guest with such an app is migrated to a machine that does not have rdtscp support, the app will get killed due to an invalid op. Fix this by emulating the rdtscp instruction. We also need to emulate rdtscp in the case where the machine has rdtscp support, but rdtsc emulation is enabled (which is unfortunately a different path: a privileged op). The rdtscp instruction reads the TSC_AUX register which presumably is set by the OS (and, in the case of tsc_mode==pvrdtscp, will be set by Xen). HV Linux and PV Linux will not set TSC_AUX because the cpuid rdtscp bit is not propogated by Xen; I'm told that Windows always sets TSC_AUX to zero. So for PV guests running on rdtscp-capable hardware (that don't use tsc_mode==pvrdtscp), always set TSC_AUX to zero. Signed-off-by: Dan Magenheimer --- xen/arch/x86/time.c | 18 +++++++++++++----- xen/arch/x86/traps.c | 36 ++++++++++++++++++++++++++++++++---- xen/include/asm-x86/time.h | 2 +- 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c index 34d3f8350a..ffe1ab8187 100644 --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -851,9 +851,13 @@ static void __update_vcpu_system_time(struct vcpu *v, int force) else tsc_stamp = t->local_tsc_stamp; - if ( d->arch.tsc_mode == TSC_MODE_PVRDTSCP && - boot_cpu_has(X86_FEATURE_RDTSCP) ) - write_rdtscp_aux(d->arch.incarnation); + if ( boot_cpu_has(X86_FEATURE_RDTSCP) ) + { + if ( d->arch.tsc_mode == TSC_MODE_PVRDTSCP ) + write_rdtscp_aux(d->arch.incarnation); + else + write_rdtscp_aux(0); + } /* Don't bother unless timestamps have changed or we are forced. */ if ( !force && (u->tsc_timestamp == tsc_stamp) ) @@ -1608,7 +1612,7 @@ void tsc_check_reliability(void) * PV SoftTSC Emulation. */ -void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs) +void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp) { s_time_t now = get_s_time(); struct domain *d = v->domain; @@ -1633,6 +1637,10 @@ void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs) regs->eax = (uint32_t)now; regs->edx = (uint32_t)(now >> 32); + + if ( rdtscp ) + regs->ecx = + (d->arch.tsc_mode == TSC_MODE_PVRDTSCP) ? d->arch.incarnation : 0; } static int host_tsc_is_safe(void) @@ -1826,7 +1834,7 @@ static void dump_softtsc(unsigned char key) printk(",khz=%"PRIu32"",d->arch.tsc_khz); if ( d->arch.incarnation ) printk(",inc=%"PRIu32"",d->arch.incarnation); - if ( !d->arch.vtsc ) + if ( !(d->arch.vtsc_kerncount | d->arch.vtsc_usercount) ) { printk("\n"); continue; diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 174dc25af4..9ccb769748 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -831,6 +831,26 @@ static void pv_cpuid(struct cpu_user_regs *regs) regs->edx = d; } +static int emulate_invalid_rdtscp(struct cpu_user_regs *regs) +{ + char opcode[3]; + unsigned long eip, rc; + struct vcpu *v = current; + + eip = regs->eip; + if ( (rc = copy_from_user(opcode, (char *)eip, sizeof(opcode))) != 0 ) + { + propagate_page_fault(eip + sizeof(opcode) - rc, 0); + return EXCRET_fault_fixed; + } + if ( memcmp(opcode, "\xf\x1\xf9", sizeof(opcode)) ) + return 0; + eip += sizeof(opcode); + pv_soft_rdtsc(v, regs, 1); + instruction_done(regs, eip, 0); + return EXCRET_fault_fixed; +} + static int emulate_forced_invalid_op(struct cpu_user_regs *regs) { char sig[5], instr[2]; @@ -879,7 +899,8 @@ asmlinkage void do_invalid_op(struct cpu_user_regs *regs) if ( likely(guest_mode(regs)) ) { - if ( !emulate_forced_invalid_op(regs) ) + if ( !emulate_invalid_rdtscp(regs) && + !emulate_forced_invalid_op(regs) ) do_guest_trap(TRAP_invalid_op, regs, 0); return; } @@ -2009,11 +2030,12 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) twobyte_opcode: /* - * All two-byte opcodes, except RDTSC (0x31) are executable only from - * guest kernel mode (virtual ring 0). + * All 2 and 3 byte opcodes, except RDTSC (0x31) and RDTSCP (0x1,0xF9) + * are executable only from guest kernel mode (virtual ring 0). */ opcode = insn_fetch(u8, code_base, eip, code_limit); if ( !guest_kernel_mode(v, regs) && + (opcode != 0x1) && /* always emulate rdtscp */ !((opcode == 0x31) && v->domain->arch.vtsc) ) goto fail; @@ -2021,6 +2043,12 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) goto fail; switch ( opcode ) { + case 0x1: /* RDTSCP */ + if ( insn_fetch(u8, code_base, eip, code_limit) != 0xf9 ) + goto fail; + pv_soft_rdtsc(v, regs, 1); + break; + case 0x06: /* CLTS */ (void)do_fpu_taskswitch(0); break; @@ -2269,7 +2297,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) } case 0x31: /* RDTSC */ - pv_soft_rdtsc(v, regs); + pv_soft_rdtsc(v, regs, 0); break; case 0x32: /* RDMSR */ diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h index 6dd071d726..6a9b9df671 100644 --- a/xen/include/asm-x86/time.h +++ b/xen/include/asm-x86/time.h @@ -59,7 +59,7 @@ int pit_broadcast_is_available(void); uint64_t acpi_pm_tick_to_ns(uint64_t ticks); uint64_t ns_to_acpi_pm_tick(uint64_t ns); -void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs); +void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp); void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec, uint32_t gtsc_khz, uint32_t incarnation); -- 2.30.2